In [1]:
# Basic Packages
from __future__ import division
import os
from datetime import datetime

# Web & file access
import requests
import io

# Import display options for showing websites
from IPython.display import IFrame, HTML
In [2]:
# Plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

%pylab --no-import-all
%matplotlib inline

import seaborn as sns
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")

import plotly.express as px
import plotly.graph_objects as go

from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
# Next line can import all of plotnine, but may overwrite things? Better import each function/object you need
#from plotnine import *
Using matplotlib backend: <object object at 0x112820650>
%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib
In [3]:
# Data
import pandas as pd
import numpy as np
from pandas_datareader import data, wb

# GIS & maps
import geopandas as gpd
gp = gpd
import georasters as gr
import geoplot as gplt
import geoplot.crs as gcrs
import mapclassify as mc
import textwrap
In [4]:
# Data Munging
from itertools import product, combinations
import difflib
import pycountry
import geocoder
from geonamescache.mappers import country
mapper = country(from_key='name', to_key='iso3')
mapper2 = country(from_key='iso3', to_key='iso')
mapper3 = country(from_key='iso3', to_key='name')

# Regressions & Stats
from scipy.stats import norm
import statsmodels.formula.api as smf
#from stargazer.stargazer import Stargazer, LineLocation
In [5]:
# Paths
pathout = './data/'

if not os.path.exists(pathout):
    os.mkdir(pathout)
    
pathgraphs = './graphs/'
if not os.path.exists(pathgraphs):
    os.mkdir(pathgraphs)
In [55]:
def my_xy_plot(dfin, 
               x='SP.POP.GROW', 
               y='ln_gdp_pc', 
               labelvar='iso3c', 
               dx=0.006125, 
               dy=0.006125, 
               xlogscale=False, 
               ylogscale=False,
               xlabel='Growth Rate of Population', 
               ylabel='Log[Income per capita in ' +  str(year) + ']',
               labels=False,
               xpct = False,
               ypct = False,
               OLS=False,
               OLSlinelabel='OLS',
               ssline=False,
               sslinelabel='45 Degree Line',
               filename='income-pop-growth.pdf',
               hue='region',
               hue_order=['East Asia & Pacific', 'Europe & Central Asia',
                          'Latin America & Caribbean ', 'Middle East & North Africa',
                          'North America', 'South Asia', 'Sub-Saharan Africa '],
               style='incomeLevel', 
               style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
               palette=None,
               size=None,
               sizes=None,
               legend_fontsize=10,
               label_font_size=12,
               save=True):
    '''
    Plot the association between x and var in dataframe using labelvar for labels.
    '''
    sns.set(rc={'figure.figsize':(11.7,8.27)})
    sns.set_context("talk")
    df = dfin.copy()
    df = df.dropna(subset=[x, y]).reset_index(drop=True)
    # Plot
    k = 0
    fig, ax = plt.subplots()
    sns.scatterplot(x=x, y=y, data=df, ax=ax, 
                    hue=hue,
                    hue_order=hue_order,
                    #hue='incomeLevel',
                    #hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
                    #hue_order=['East Asia & Pacific', 'Europe & Central Asia',
                    #           'Latin America & Caribbean ', 'Middle East & North Africa',
                    #           'North America', 'South Asia', 'Sub-Saharan Africa '],
                    alpha=1, 
                    style=style, 
                    style_order=style_order,
                    palette=palette,
                    size=size,
                    sizes=sizes,
                    #palette=sns.color_palette("Blues_r", df[hue].unique().shape[0]+6)[:df[hue].unique().shape[0]*2:2],
                )
    if OLS:
        sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
    if ssline:
        ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)
    if labels:
        movex = df[x].mean() * dx
        movey = df[y].mean() * dy
        for line in range(0,df.shape[0]):
            ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_font_size, color='black')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    if xpct:
        fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
        xticks = mtick.FormatStrFormatter(fmt)
        ax.xaxis.set_major_formatter(xticks)
    if ypct:
        fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
        yticks = mtick.FormatStrFormatter(fmt)
        ax.yaxis.set_major_formatter(yticks)
    if ylogscale:
        ax.set(yscale="log")
    if xlogscale:
        ax.set(xscale="log")
    handles, labels = ax.get_legend_handles_labels()
    handles = np.array(handles)
    labels = np.array(labels)
    handles = list(handles[(labels!=hue) & (labels!=style) & (labels!=size)])
    labels = list(labels[(labels!=hue) & (labels!=style) & (labels!=size)])
    ax.legend(handles=handles, labels=labels, fontsize=legend_fontsize)
    if save:
        plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight')
    return fig
In [6]:
currentYear = datetime.now().year
year = min(2020, currentYear-2)
Exercise 1: Get WDI data on patent applications by residents and non-residents in each country. Create a new variable that shows the total patents for each country.
In [67]:
url = 'https://data.worldbank.org/share/widget?indicators=IP.PAT.RESD'
IFrame(url, width=500, height=300)
Out[67]:
In [66]:
url = 'https://data.worldbank.org/share/widget?indicators=IP.PAT.NRES'
IFrame(url, width=500, height=300)
Out[66]:
In [157]:
url = 'http://databank.worldbank.org/data/reports.aspx?source=2&series=IP.PAT.RESD&country='

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

r = requests.get(url, headers=header)

Residents = pd.read_html(r.text)
In [158]:
Residents[19].columns = ['Country', 1990, 2000, 2012, 2013, 2014, 2015,
                         2016, 2017, 2018, 2019, 2020, 2021, 2022]
Residents[19].head()
residents = Residents[19].iloc[1: , :]
residents.head()
Out[158]:
Country 1990 2000 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
1 Afghanistan .. .. .. .. .. .. .. .. .. .. .. .. NaN
2 Albania .. .. .. .. 10.0 14.0 20.0 16.0 15.0 4.0 .. .. NaN
3 Algeria 6.0 32.0 119.0 118.0 94.0 89.0 106.0 149.0 152.0 113.0 163.0 .. NaN
4 American Samoa .. .. .. .. .. .. .. .. .. .. .. .. NaN
5 Andorra .. .. .. .. .. .. .. .. 1.0 3.0 3.0 .. NaN
In [159]:
url = 'http://databank.worldbank.org/data/reports.aspx?source=2&series=IP.PAT.NRES&country='

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

r = requests.get(url, headers=header)

Nonresidents = pd.read_html(r.text)
In [160]:
Nonresidents[19].columns = ['Country', 1990, 2000, 2012, 2013, 2014, 2015,
                         2016, 2017, 2018, 2019, 2020, 2021, 2022]
Nonresidents[19].head()
nonresidents = Nonresidents[19].iloc[1: , :]
nonresidents.head()
Out[160]:
Country 1990 2000 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
1 Afghanistan .. .. .. .. .. .. .. .. .. .. .. .. NaN
2 Albania .. .. .. 4.0 3.0 5.0 5.0 8.0 3.0 1.0 .. .. NaN
3 Algeria 229.0 127.0 781.0 722.0 719.0 716.0 566.0 594.0 521.0 525.0 547.0 .. NaN
4 American Samoa .. .. .. .. .. .. .. .. .. .. .. .. NaN
5 Andorra .. .. .. .. .. .. 3.0 6.0 10.0 12.0 5.0 .. NaN
In [ ]:
# Part 1 done
In [161]:
url = 'https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#remote-data-wb'
# IFrame(url, width=800, height=400)
In [162]:
wbcountries = wb.get_countries()
wbcountries.head()
Out[162]:
iso3c iso2c name region adminregion incomeLevel lendingType capitalCity longitude latitude
0 ABW AW Aruba Latin America & Caribbean High income Not classified Oranjestad -70.0167 12.5167
1 AFE ZH Africa Eastern and Southern Aggregates Aggregates Aggregates NaN NaN
2 AFG AF Afghanistan South Asia South Asia Low income IDA Kabul 69.1761 34.5228
3 AFR A9 Africa Aggregates Aggregates Aggregates NaN NaN
4 AFW ZI Africa Western and Central Aggregates Aggregates Aggregates NaN NaN
In [169]:
url = 'https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(PPP)_per_capita'
IFrame(url, width=800, height=400)

gdppc_wiki = pd.read_html(url, encoding='utf-8')[1]
gdppc_wiki

gdppc_wiki.columns = ['Country/Territory', 'UN Region', 'gdppc_IMF', 'year_IMF',
                      'gdppc_WB', 'year_WB', 'gdppc_CIA', 'year_CIA']
gdppc_wiki.head()

gdppc_wiki['Country/Territory'] = gdppc_wiki['Country/Territory'].str.replace('*', '', regex=True).str.strip()

for c in gdppc_wiki.columns[2:-1]:
    if gdppc_wiki[c].dtype=='O':
        gdppc_wiki[c] = pd.to_numeric(gdppc_wiki[c].str.replace('—', 'nan'), errors='coerce')
        if c.startswith('year'):
            gdppc_wiki[c] = gdppc_wiki[c].astype('Int64')
gdppc_wiki.columns = ['Country', 'UN Region', 'gdppc_IMF', 'year_IMF', 'gdppc_WB', 'year_WB', 
                      'gdppc_CIA', 'year_CIA']
gdppc_wiki.head()
Out[169]:
Country UN Region gdppc_IMF year_IMF gdppc_WB year_WB gdppc_CIA year_CIA
0 Luxembourg Europe 141587.0 2022 134754.0 2021 110300 2020
1 Liechtenstein Europe NaN <NA> NaN <NA> 139100 2009
2 Singapore Asia 131426.0 2022 116487.0 2021 93400 2020
3 Ireland Europe 131034.0 2022 106456.0 2021 89700 2020
4 Monaco Europe NaN <NA> NaN <NA> 115700 2015
In [174]:
patents_merged = residents.merge(nonresidents, left_on='Country', right_on='Country')
patents_merged.head()
Out[174]:
Country 1990_x 2000_x 2012_x 2013_x 2014_x 2015_x 2016_x 2017_x 2018_x ... 2013_y 2014_y 2015_y 2016_y 2017_y 2018_y 2019_y 2020_y 2021_y 2022_y
0 Afghanistan .. .. .. .. .. .. .. .. .. ... .. .. .. .. .. .. .. .. .. NaN
1 Albania .. .. .. .. 10.0 14.0 20.0 16.0 15.0 ... 4.0 3.0 5.0 5.0 8.0 3.0 1.0 .. .. NaN
2 Algeria 6.0 32.0 119.0 118.0 94.0 89.0 106.0 149.0 152.0 ... 722.0 719.0 716.0 566.0 594.0 521.0 525.0 547.0 .. NaN
3 American Samoa .. .. .. .. .. .. .. .. .. ... .. .. .. .. .. .. .. .. .. NaN
4 Andorra .. .. .. .. .. .. .. .. 1.0 ... .. .. .. 3.0 6.0 10.0 12.0 5.0 .. NaN

5 rows × 27 columns

In [211]:
full_merged = patents_merged.merge(wdi, left_on='Country', right_on='name')
full_merged.head()
Out[211]:
Country 1990_x 2000_x 2012_x 2013_x 2014_x 2015_x 2016_x 2017_x 2018_x ... NY.GDP.PCAP.PP.KD NY.GDP.PCAP.KD SL.GDP.PCAP.EM.KD SP.POP.GROW SP.POP.TOTL SP.DYN.WFRT SP.DYN.TFRT.IN gdp_pc ln_gdp_pc ln_pop
0 Afghanistan .. .. .. .. .. .. .. .. .. ... 1970.560169 529.741210 9226.547100 2.303812 38928341.0 NaN 4.176 1970.560169 7.586073 17.477233
1 Afghanistan .. .. .. .. .. .. .. .. .. ... 2065.036235 555.138996 8522.606006 2.313073 38041757.0 NaN 4.321 2065.036235 7.632903 17.454195
2 Afghanistan .. .. .. .. .. .. .. .. .. ... 2033.804389 546.743010 8490.085702 2.384309 37171922.0 NaN 4.473 2033.804389 7.617663 17.431064
3 Afghanistan .. .. .. .. .. .. .. .. .. ... 2058.400221 553.355052 8698.017169 2.547833 36296111.0 NaN 4.633 2058.400221 7.629684 17.407221
4 Afghanistan .. .. .. .. .. .. .. .. .. ... 2057.067978 552.996908 8795.234080 2.778035 35383028.0 NaN 4.800 2057.067978 7.629037 17.381743

5 rows × 49 columns

In [ ]:
 
In [ ]:
 
In [202]:
wbcountries = wb.get_countries()
wbcountries = wbcountries.loc[wbcountries.region.isin(['Aggregates'])==False].reset_index(drop=True)
wbcountries['name'] = wbcountries.name.str.strip()
wbcountries['incomeLevel'] = wbcountries['incomeLevel'].str.title()
wbcountries.loc[wbcountries.iso3c=='VEN', 'incomeLevel'] = 'Upper Middle Income'
In [203]:
wdi_indicators = ['NY.GDP.PCAP.PP.KD', 'NY.GDP.PCAP.KD', 'SL.GDP.PCAP.EM.KD', 'SP.POP.GROW', 'SP.POP.TOTL', 'SP.DYN.WFRT', 'SP.DYN.TFRT.IN']
In [204]:
popvars = wb.search(string='population')
popvars.head()
Out[204]:
id name unit source sourceNote sourceOrganization topics
24 1.1_ACCESS.ELECTRICITY.TOT Access to electricity (% of total population) Sustainable Energy for All Access to electricity is the percentage of pop... b'World Bank Global Electrification Database 2...
39 1.2_ACCESS.ELECTRICITY.RURAL Access to electricity (% of rural population) Sustainable Energy for All Access to electricity is the percentage of rur... b'World Bank Global Electrification Database 2...
40 1.3_ACCESS.ELECTRICITY.URBAN Access to electricity (% of urban population) Sustainable Energy for All Access to electricity is the percentage of tot... b'World Bank Global Electrification Database 2...
164 2.1_ACCESS.CFT.TOT Access to Clean Fuels and Technologies for coo... Sustainable Energy for All b''
195 3.11.01.01.popcen Population census Statistical Capacity Indicators Population censuses collect data on the size, ... b'World Bank Microdata library. Original sourc...
In [205]:
wdi = wb.download(indicator=wdi_indicators, country='all', start=1950, end=year)
wdi = wdi.reset_index()
wdi['year'] = wdi.year.astype(int)
wdi['gdp_pc'] = wdi['NY.GDP.PCAP.PP.KD']
wdi['ln_gdp_pc'] = wdi['NY.GDP.PCAP.PP.KD'].apply(np.log)
wdi['ln_pop'] = wdi['SP.POP.TOTL'].apply(np.log)
wdi.head()
Out[205]:
country year NY.GDP.PCAP.PP.KD NY.GDP.PCAP.KD SL.GDP.PCAP.EM.KD SP.POP.GROW SP.POP.TOTL SP.DYN.WFRT SP.DYN.TFRT.IN gdp_pc ln_gdp_pc ln_pop
0 Africa Eastern and Southern 2020 3491.978104 1452.730251 9261.307330 2.605427 677243299.0 NaN 4.281856 3491.978104 8.158224 20.333541
1 Africa Eastern and Southern 2019 3684.315172 1534.890147 9486.431025 2.636666 660046272.0 3.884984 4.349436 3684.315172 8.211840 20.307820
2 Africa Eastern and Southern 2018 3697.317275 1544.077975 9546.202255 2.665620 643090131.0 NaN 4.420264 3697.317275 8.215363 20.281795
3 Africa Eastern and Southern 2017 3694.912741 1546.795571 9575.237978 2.690902 626392880.0 NaN 4.493744 3694.912741 8.214712 20.255488
4 Africa Eastern and Southern 2016 3691.284532 1548.813076 9602.411014 2.712218 609978946.0 NaN 4.569675 3691.284532 8.213730 20.228935
In [206]:
wdi = wbcountries.merge(wdi, left_on='name', right_on='country')
wdi.head()
Out[206]:
iso3c iso2c name region adminregion incomeLevel lendingType capitalCity longitude latitude ... NY.GDP.PCAP.PP.KD NY.GDP.PCAP.KD SL.GDP.PCAP.EM.KD SP.POP.GROW SP.POP.TOTL SP.DYN.WFRT SP.DYN.TFRT.IN gdp_pc ln_gdp_pc ln_pop
0 ABW AW Aruba Latin America & Caribbean High Income Not classified Oranjestad -70.0167 12.5167 ... 29563.756955 23026.332866 NaN 0.428017 106766.0 NaN 1.901 29563.756955 10.294304 11.578395
1 ABW AW Aruba Latin America & Caribbean High Income Not classified Oranjestad -70.0167 12.5167 ... 38221.117314 29769.293907 NaN 0.437415 106310.0 NaN 1.901 38221.117314 10.551143 11.574115
2 ABW AW Aruba Latin America & Caribbean High Income Not classified Oranjestad -70.0167 12.5167 ... 39206.356147 30536.667193 NaN 0.459266 105846.0 NaN 1.896 39206.356147 10.576594 11.569740
3 ABW AW Aruba Latin America & Caribbean High Income Not classified Oranjestad -70.0167 12.5167 ... 38893.960556 30293.351539 NaN 0.471874 105361.0 NaN 1.886 38893.960556 10.568594 11.565148
4 ABW AW Aruba Latin America & Caribbean High Income Not classified Oranjestad -70.0167 12.5167 ... 37046.877414 28854.713299 NaN 0.502860 104865.0 NaN 1.872 37046.877414 10.519939 11.560429

5 rows × 22 columns

Exercise 2: Using the my_xy_plot function plot the relation between GDP per capita and total patents in the years 1990, 1995, 2000, 2010, 2020.
In [214]:
g = my_xy_plot(full_merged, 
               x='gdp_pc', 
               y='1990_x', 
               xlabel='GDP per capita',
               ylabel='Total Patents',
               OLS=True,
               labels=True, 
               #size="ln_pop", 
               #sizes=(10, 400), 
               filename='ln-gdp-pc-latitude.pdf')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [214], in <cell line: 1>()
----> 1 g = my_xy_plot(full_merged, 
      2                x='gdp_pc', 
      3                y='1990_x', 
      4                xlabel='GDP per capita',
      5                ylabel='Total Patents',
      6                OLS=True,
      7                labels=True, 
      8                #size="ln_pop", 
      9                #sizes=(10, 400), 
     10                filename='ln-gdp-pc-latitude.pdf')

Input In [55], in my_xy_plot(dfin, x, y, labelvar, dx, dy, xlogscale, ylogscale, xlabel, ylabel, labels, xpct, ypct, OLS, OLSlinelabel, ssline, sslinelabel, filename, hue, hue_order, style, style_order, palette, size, sizes, legend_fontsize, label_font_size, save)
     41 sns.scatterplot(x=x, y=y, data=df, ax=ax, 
     42                 hue=hue,
     43                 hue_order=hue_order,
   (...)
     55                 #palette=sns.color_palette("Blues_r", df[hue].unique().shape[0]+6)[:df[hue].unique().shape[0]*2:2],
     56             )
     57 if OLS:
---> 58     sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
     59 if ssline:
     60     ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/_decorators.py:46, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
     36     warnings.warn(
     37         "Pass the following variable{} as {}keyword arg{}: {}. "
     38         "From version 0.12, the only valid positional argument "
   (...)
     43         FutureWarning
     44     )
     45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:863, in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, seed, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
    861 scatter_kws["marker"] = marker
    862 line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 863 plotter.plot(ax, scatter_kws, line_kws)
    864 return ax

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:370, in _RegressionPlotter.plot(self, ax, scatter_kws, line_kws)
    367     self.scatterplot(ax, scatter_kws)
    369 if self.fit_reg:
--> 370     self.lineplot(ax, line_kws)
    372 # Label the axes
    373 if hasattr(self.x, "name"):

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:413, in _RegressionPlotter.lineplot(self, ax, kws)
    411 """Draw the model."""
    412 # Fit the regression model
--> 413 grid, yhat, err_bands = self.fit_regression(ax)
    414 edges = grid[0], grid[-1]
    416 # Get set default aesthetics

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:221, in _RegressionPlotter.fit_regression(self, ax, x_range, grid)
    219     yhat, yhat_boots = self.fit_logx(grid)
    220 else:
--> 221     yhat, yhat_boots = self.fit_fast(grid)
    223 # Compute the confidence interval at each grid point
    224 if ci is None:

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:238, in _RegressionPlotter.fit_fast(self, grid)
    236 X, y = np.c_[np.ones(len(self.x)), self.x], self.y
    237 grid = np.c_[np.ones(len(grid)), grid]
--> 238 yhat = grid.dot(reg_func(X, y))
    239 if self.ci is None:
    240     return yhat, None

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:234, in _RegressionPlotter.fit_fast.<locals>.reg_func(_x, _y)
    233 def reg_func(_x, _y):
--> 234     return np.linalg.pinv(_x).dot(_y)

TypeError: can't multiply sequence by non-int of type 'float'
In [216]:
g = my_xy_plot(full_merged, 
               x='gdp_pc', 
               y='2000_x', 
               xlabel='GDP per capita',
               ylabel='Total Patents',
               OLS=True,
               labels=True, 
               #size="ln_pop", 
               #sizes=(10, 400), 
               filename='ln-gdp-pc-latitude.pdf')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [216], in <cell line: 1>()
----> 1 g = my_xy_plot(full_merged, 
      2                x='gdp_pc', 
      3                y='2000_x', 
      4                xlabel='GDP per capita',
      5                ylabel='Total Patents',
      6                OLS=True,
      7                labels=True, 
      8                #size="ln_pop", 
      9                #sizes=(10, 400), 
     10                filename='ln-gdp-pc-latitude.pdf')

Input In [55], in my_xy_plot(dfin, x, y, labelvar, dx, dy, xlogscale, ylogscale, xlabel, ylabel, labels, xpct, ypct, OLS, OLSlinelabel, ssline, sslinelabel, filename, hue, hue_order, style, style_order, palette, size, sizes, legend_fontsize, label_font_size, save)
     41 sns.scatterplot(x=x, y=y, data=df, ax=ax, 
     42                 hue=hue,
     43                 hue_order=hue_order,
   (...)
     55                 #palette=sns.color_palette("Blues_r", df[hue].unique().shape[0]+6)[:df[hue].unique().shape[0]*2:2],
     56             )
     57 if OLS:
---> 58     sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
     59 if ssline:
     60     ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/_decorators.py:46, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
     36     warnings.warn(
     37         "Pass the following variable{} as {}keyword arg{}: {}. "
     38         "From version 0.12, the only valid positional argument "
   (...)
     43         FutureWarning
     44     )
     45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:863, in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, seed, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
    861 scatter_kws["marker"] = marker
    862 line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 863 plotter.plot(ax, scatter_kws, line_kws)
    864 return ax

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:370, in _RegressionPlotter.plot(self, ax, scatter_kws, line_kws)
    367     self.scatterplot(ax, scatter_kws)
    369 if self.fit_reg:
--> 370     self.lineplot(ax, line_kws)
    372 # Label the axes
    373 if hasattr(self.x, "name"):

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:413, in _RegressionPlotter.lineplot(self, ax, kws)
    411 """Draw the model."""
    412 # Fit the regression model
--> 413 grid, yhat, err_bands = self.fit_regression(ax)
    414 edges = grid[0], grid[-1]
    416 # Get set default aesthetics

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:221, in _RegressionPlotter.fit_regression(self, ax, x_range, grid)
    219     yhat, yhat_boots = self.fit_logx(grid)
    220 else:
--> 221     yhat, yhat_boots = self.fit_fast(grid)
    223 # Compute the confidence interval at each grid point
    224 if ci is None:

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:238, in _RegressionPlotter.fit_fast(self, grid)
    236 X, y = np.c_[np.ones(len(self.x)), self.x], self.y
    237 grid = np.c_[np.ones(len(grid)), grid]
--> 238 yhat = grid.dot(reg_func(X, y))
    239 if self.ci is None:
    240     return yhat, None

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:234, in _RegressionPlotter.fit_fast.<locals>.reg_func(_x, _y)
    233 def reg_func(_x, _y):
--> 234     return np.linalg.pinv(_x).dot(_y)

TypeError: can't multiply sequence by non-int of type 'float'
In [218]:
g = my_xy_plot(full_merged, 
               x='gdp_pc', 
               y='2020_x', 
               xlabel='GDP per capita',
               ylabel='Total Patents',
               OLS=True,
               labels=True, 
               #size="ln_pop", 
               #sizes=(10, 400), 
               filename='ln-gdp-pc-latitude.pdf')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [218], in <cell line: 1>()
----> 1 g = my_xy_plot(full_merged, 
      2                x='gdp_pc', 
      3                y='2020_x', 
      4                xlabel='GDP per capita',
      5                ylabel='Total Patents',
      6                OLS=True,
      7                labels=True, 
      8                #size="ln_pop", 
      9                #sizes=(10, 400), 
     10                filename='ln-gdp-pc-latitude.pdf')

Input In [55], in my_xy_plot(dfin, x, y, labelvar, dx, dy, xlogscale, ylogscale, xlabel, ylabel, labels, xpct, ypct, OLS, OLSlinelabel, ssline, sslinelabel, filename, hue, hue_order, style, style_order, palette, size, sizes, legend_fontsize, label_font_size, save)
     41 sns.scatterplot(x=x, y=y, data=df, ax=ax, 
     42                 hue=hue,
     43                 hue_order=hue_order,
   (...)
     55                 #palette=sns.color_palette("Blues_r", df[hue].unique().shape[0]+6)[:df[hue].unique().shape[0]*2:2],
     56             )
     57 if OLS:
---> 58     sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
     59 if ssline:
     60     ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/_decorators.py:46, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
     36     warnings.warn(
     37         "Pass the following variable{} as {}keyword arg{}: {}. "
     38         "From version 0.12, the only valid positional argument "
   (...)
     43         FutureWarning
     44     )
     45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:863, in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, seed, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
    861 scatter_kws["marker"] = marker
    862 line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 863 plotter.plot(ax, scatter_kws, line_kws)
    864 return ax

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:370, in _RegressionPlotter.plot(self, ax, scatter_kws, line_kws)
    367     self.scatterplot(ax, scatter_kws)
    369 if self.fit_reg:
--> 370     self.lineplot(ax, line_kws)
    372 # Label the axes
    373 if hasattr(self.x, "name"):

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:413, in _RegressionPlotter.lineplot(self, ax, kws)
    411 """Draw the model."""
    412 # Fit the regression model
--> 413 grid, yhat, err_bands = self.fit_regression(ax)
    414 edges = grid[0], grid[-1]
    416 # Get set default aesthetics

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:221, in _RegressionPlotter.fit_regression(self, ax, x_range, grid)
    219     yhat, yhat_boots = self.fit_logx(grid)
    220 else:
--> 221     yhat, yhat_boots = self.fit_fast(grid)
    223 # Compute the confidence interval at each grid point
    224 if ci is None:

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:238, in _RegressionPlotter.fit_fast(self, grid)
    236 X, y = np.c_[np.ones(len(self.x)), self.x], self.y
    237 grid = np.c_[np.ones(len(grid)), grid]
--> 238 yhat = grid.dot(reg_func(X, y))
    239 if self.ci is None:
    240     return yhat, None

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/seaborn/regression.py:234, in _RegressionPlotter.fit_fast.<locals>.reg_func(_x, _y)
    233 def reg_func(_x, _y):
--> 234     return np.linalg.pinv(_x).dot(_y)

TypeError: can't multiply sequence by non-int of type 'float'
Exercise 3: Using the my_xy_line_plot function plot the evolution of GDP per capita and total patents by income groups and regions (separate figures).
In [221]:
def my_xy_line_plot(dfin, 
                    x='year', 
                    y='ln_gdp_pc', 
                    labelvar='iso3c', 
                    dx=0.006125, 
                    dy=0.006125, 
                    xlogscale=False, 
                    ylogscale=False,
                    xlabel='Growth Rate of Population', 
                    ylabel='Log[Income per capita in ' +  str(year) + ']',
                    labels=False,
                    xpct = False,
                    ypct = False,
                    OLS=False,
                    OLSlinelabel='OLS',
                    ssline=False,
                    sslinelabel='45 Degree Line',
                    filename='income-pop-growth.pdf',
                    hue='region',
                    hue_order=['East Asia & Pacific', 'Europe & Central Asia',
                               'Latin America & Caribbean ', 'Middle East & North Africa',
                               'North America', 'South Asia', 'Sub-Saharan Africa '],
                    style='incomeLevel', 
                    style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
                    palette=None,
                    legend_fontsize=10,
                    label_fontsize=12,
                    loc=None,
                    save=True):
    '''
    Plot the association between x and var in dataframe using labelvar for labels. 
    '''
    sns.set(rc={'figure.figsize':(11.7,8.27)})
    sns.set_context("talk")
    df = dfin.copy()
    df = df.dropna(subset=[x, y]).reset_index(drop=True)
    # Plot
    k = 0
    fig, ax = plt.subplots()
    sns.lineplot(x=x, y=y, data=df, ax=ax, 
                    hue=hue,
                    hue_order=hue_order,
                    alpha=1, 
                    style=style, 
                    style_order=style_order,
                    palette=palette,
                )
    if OLS:
        sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
    if ssline:
        ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)
    if labels:
        movex = df[x].mean() * dx
        movey = df[y].mean() * dy
        for line in range(0,df.shape[0]):
            ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_fontsize, color='black')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    if xpct:
        fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
        xticks = mtick.FormatStrFormatter(fmt)
        ax.xaxis.set_major_formatter(xticks)
    if ypct:
        fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
        yticks = mtick.FormatStrFormatter(fmt)
        ax.yaxis.set_major_formatter(yticks)
    if ylogscale:
        ax.set(yscale="log")
    if xlogscale:
        ax.set(xscale="log")
    handles, labels = ax.get_legend_handles_labels()
    handles = np.array(handles)
    labels = np.array(labels)
    handles = list(handles[(labels!='region') & (labels!='incomeLevel')])
    labels = list(labels[(labels!='region') & (labels!='incomeLevel')])
    ax.legend(handles=handles, labels=labels, fontsize=legend_fontsize, loc=loc)
    if save:
        plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight')
    return fig
In [222]:
palette=sns.color_palette("Blues_r", wdi['incomeLevel'].unique().shape[0]+6)[:wdi['incomeLevel'].unique().shape[0]*2:2]
fig = my_xy_line_plot(full_merged, 
                x='gdp_pc', 
                y='2020_x', 
                xlabel='Year',
                ylabel='Log[GDP per capita]',
                filename='ln-gdp-pc-income-groups-TS.pdf',
                hue='incomeLevel',
                hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
                palette=palette,
                OLS=False, 
                labels=False,
                legend_fontsize=16,
                loc='lower right',
                save=True)
Exercise 4: Plot the relation between patenting activity by residents and non-residents in the year 2015. Make sure to show the 45 degree line so you can see how similar they are.
In [231]:
symbols = ['circle', 'x', 'square', 'cross', 'diamond', 'star-diamond', 'triangle-up']
fig = px.scatter(full_merged,
                 x="2022_x", 
                 y="2022_y", 
                 color='region',
                 symbol='region',
                 symbol_sequence=symbols,
                 hover_name='Country',
                 size='ln_pop',
                 size_max=15,
                 trendline="ols",
                 trendline_scope="overall",
                 trendline_color_override="black",
                 labels={
                     "latitude": "Latitude",
                     "ln_gdp_pc": "Log[GDP per capita (" + str(year) + ")]",
                     "gdp_pc": "GDP per capita (" + str(year) + ")",
                     "region": "WB Region"
                 },
                 opacity=0.75,
                 height=800,
                )
fig
Exercise 5: Create a static and a dynamic map for patenting activity in the year 2015 across the world.
In [238]:
# Static
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}

url = 'https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip'
r = requests.get(url, headers=headers)
countries = gp.read_file(io.BytesIO(r.content))
fig, ax = plt.subplots(figsize=(15,10))
countries.plot(ax=ax)
ax.set_title("Exercise 5", fontdict={'fontsize':34})
gplt.polyplot(
    full_merged, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
    edgecolor='white', facecolor='lightgray',
    rasterized=True,
    extent=[-180, -90, 180, 90],
)
gplt.choropleth(full_merged, hue='2015_x', 
                projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
                edgecolor='white', 
                linewidth=1,
                cmap='Reds', legend=True,
                scheme='FisherJenks',
                legend_kwargs={'bbox_to_anchor':(0.3, 0.5),
                               'frameon': True,
                               'title':'GDP per capita',
                              },
                figsize=(12,8),
                rasterized=True,
               )
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [238], in <cell line: 10>()
      8 countries.plot(ax=ax)
      9 ax.set_title("Exercise 5", fontdict={'fontsize':34})
---> 10 gplt.polyplot(
     11     full_merged, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
     12     edgecolor='white', facecolor='lightgray',
     13     rasterized=True,
     14     extent=[-180, -90, 180, 90],
     15 )
     16 gplt.choropleth(full_merged, hue='2015_x', 
     17                 projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
     18                 edgecolor='white', 
   (...)
     27                 rasterized=True,
     28                )

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/geoplot/geoplot.py:900, in polyplot(df, projection, extent, figsize, ax, **kwargs)
    896                     ax.add_patch(feature)
    898         return ax
--> 900 plot = PolyPlot(df, figsize=figsize, ax=ax, extent=extent, projection=projection, **kwargs)
    901 return plot.draw()

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/geoplot/geoplot.py:862, in polyplot.<locals>.PolyPlot.__init__(self, df, **kwargs)
    861 def __init__(self, df, **kwargs):
--> 862     super().__init__(df, **kwargs)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/geoplot/geoplot.py:605, in Plot.__init__(self, df, **kwargs)
    599 def __init__(self, df, **kwargs):
    600     if not hasattr(df, 'geometry'):
    601         # The two valid df types are GeoDataFrame and GeoSeries. The former may be missing
    602         # a geometry column, depending on how it was initialized. The latter always returns
    603         # self when it is asked for its geometry property, and so it will never be the source
    604         # of this error.
--> 605         raise ValueError(
    606             'The input GeoDataFrame does not have a "geometry" column set.'
    607         )
    608     self.df = df
    610     if kwargs['ax'] is None:
    611         # a default figsize is always set and passed into the initializer

ValueError: The input GeoDataFrame does not have a "geometry" column set.
In [243]:
# Dynamic
scheme = mc.Quantiles(full_merged['2015_x'], k=5)
classifier = mc.Quantiles.make(k=5, rolling=True)
full_merged['2015_x'] = classifier(full_merged['2015_x'])
full_merged['2015_x'] = full_merged['2015_x'].apply(lambda x: scheme.get_legend_classes()[x].replace('[   ', '[').replace('( ', '('))
fig = px.choropleth(dffig2.sort_values('gdp_pc_q', ascending=True), 
                    locations="iso3c",
                    color="gdp_pc_qc",
                    hover_name='name',
                    hover_data=['iso3c', 'ln_pop'],
                    labels={
                        "gdp_pc_qc": "GDP per capita (" + str(year) + ")",
                    },
                    color_discrete_sequence=px.colors.sequential.Reds,
                    height=600, 
                    width=1000,
                   )
# Change legend position
fig.update_layout(legend=dict(
    yanchor="bottom",
    y=0.15,
    xanchor="left",
    x=0.05
))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [243], in <cell line: 2>()
      1 # Dynamic
----> 2 scheme = mc.Quantiles(full_merged['2015_x'], k=5)
      3 classifier = mc.Quantiles.make(k=5, rolling=True)
      4 full_merged['2015_x'] = classifier(full_merged['2015_x'])

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/mapclassify/classifiers.py:1460, in Quantiles.__init__(self, y, k)
   1458 def __init__(self, y, k=K):
   1459     self.k = k
-> 1460     MapClassifier.__init__(self, y)
   1461     self.name = "Quantiles"

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/mapclassify/classifiers.py:617, in MapClassifier.__init__(self, y)
    615 self.fmt = FMT
    616 self.y = y
--> 617 self._classify()
    618 self._summary()

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/mapclassify/classifiers.py:636, in MapClassifier._classify(self)
    635 def _classify(self):
--> 636     self._set_bins()
    637     self.yb, self.counts = bin1d(self.y, self.bins)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/mapclassify/classifiers.py:1466, in Quantiles._set_bins(self)
   1464 y = self.y
   1465 k = self.k
-> 1466 self.bins = quantile(y, k=k)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/mapclassify/classifiers.py:234, in quantile(y, k)
    232 if p[-1] > 100.0:
    233     p[-1] = 100.0
--> 234 q = np.array([stats.scoreatpercentile(y, pct) for pct in p])
    235 q = np.unique(q)
    236 k_q = len(q)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/mapclassify/classifiers.py:234, in <listcomp>(.0)
    232 if p[-1] > 100.0:
    233     p[-1] = 100.0
--> 234 q = np.array([stats.scoreatpercentile(y, pct) for pct in p])
    235 q = np.unique(q)
    236 k_q = len(q)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/scipy/stats/_stats_py.py:2013, in scoreatpercentile(a, per, limit, interpolation_method, axis)
   2010 if axis is None:
   2011     axis = 0
-> 2013 return _compute_qth_percentile(sorted_, per, interpolation_method, axis)

File ~/opt/anaconda3/envs/EconGrowthUG-Builds/lib/python3.9/site-packages/scipy/stats/_stats_py.py:2057, in _compute_qth_percentile(sorted_, per, interpolation_method, axis)
   2054     sumval = weights.sum()
   2056 # Use np.add.reduce (== np.sum but a little faster) to coerce data type
-> 2057 return np.add.reduce(sorted_[tuple(indexer)] * weights, axis=axis) / sumval

TypeError: can't multiply sequence by non-int of type 'float'
Exercise 6: Explore the relation between economic development as measured by Log[GDP per capita] and patenting activity. Show the relation for residents, non-residents, and total, all in one nice looking table. Also, produce a few nice looking figures.
In [244]:
full_merged
Out[244]:
Country 1990_x 2000_x 2012_x 2013_x 2014_x 2015_x 2016_x 2017_x 2018_x ... NY.GDP.PCAP.PP.KD NY.GDP.PCAP.KD SL.GDP.PCAP.EM.KD SP.POP.GROW SP.POP.TOTL SP.DYN.WFRT SP.DYN.TFRT.IN gdp_pc ln_gdp_pc ln_pop
0 Afghanistan .. .. .. .. .. .. .. .. .. ... 1970.560169 529.741210 9226.547100 2.303812 38928341.0 NaN 4.176 1970.560169 7.586073 17.477233
1 Afghanistan .. .. .. .. .. .. .. .. .. ... 2065.036235 555.138996 8522.606006 2.313073 38041757.0 NaN 4.321 2065.036235 7.632903 17.454195
2 Afghanistan .. .. .. .. .. .. .. .. .. ... 2033.804389 546.743010 8490.085702 2.384309 37171922.0 NaN 4.473 2033.804389 7.617663 17.431064
3 Afghanistan .. .. .. .. .. .. .. .. .. ... 2058.400221 553.355052 8698.017169 2.547833 36296111.0 NaN 4.633 2058.400221 7.629684 17.407221
4 Afghanistan .. .. .. .. .. .. .. .. .. ... 2057.067978 552.996908 8795.234080 2.778035 35383028.0 NaN 4.800 2057.067978 7.629037 17.381743
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3045 Cuba .. .. 38.0 27.0 24.0 .. 32.0 29.0 29.0 ... NaN NaN NaN 2.204965 7793258.0 NaN 4.600 NaN NaN 15.868770
3046 Cuba .. .. 38.0 27.0 24.0 .. 32.0 29.0 29.0 ... NaN NaN NaN 2.252093 7623300.0 NaN 4.584 NaN NaN 15.846720
3047 Cuba .. .. 38.0 27.0 24.0 .. 32.0 29.0 29.0 ... NaN NaN NaN 2.202014 7453535.0 NaN 4.502 NaN NaN 15.824199
3048 Cuba .. .. 38.0 27.0 24.0 .. 32.0 29.0 29.0 ... NaN NaN NaN 2.078171 7291201.0 NaN 4.365 NaN NaN 15.802179
3049 Cuba .. .. 38.0 27.0 24.0 .. 32.0 29.0 29.0 ... NaN NaN NaN NaN 7141241.0 NaN 4.192 NaN NaN 15.781397

3050 rows × 49 columns